import pandas as pd
import matplotlib.pyplot as plt
from collections import Counter
import matplotlib
matplotlib.rcParams['font.family'] = 'SimHei'

# 读取数据
df = pd.read_csv('data_100.csv', encoding='utf-8')
df['前区号码'] = df['前区号码'].astype(str)
df['后区号码'] = df['后区号码'].astype(str)

# 拆分号码为单个列表
def extract_numbers(series):
    return [num for nums in series for num in nums.split()]

front_nums = extract_numbers(df['前区号码'])
back_nums = extract_numbers(df['后区号码'])

# 统计频次并按号码升序排序
front_freq = Counter(front_nums)
back_freq = Counter(back_nums)

front_sorted = dict(sorted(front_freq.items(), key=lambda x: int(x[0])))
back_sorted = dict(sorted(back_freq.items(), key=lambda x: int(x[0])))

# 绘图（前区）
plt.figure(figsize=(10,4))
plt.bar(front_sorted.keys(), front_sorted.values(), color='skyblue')
plt.title('前区号码出现频率（按号码升序）')
plt.xlabel('前区号码')
plt.ylabel('出现次数')
plt.grid(True)
plt.tight_layout()
plt.show()

# 绘图（后区）
plt.figure(figsize=(8,3))
plt.bar(back_sorted.keys(), back_sorted.values(), color='orange')
plt.title('后区号码出现频率（按号码升序）')
plt.xlabel('后区号码')
plt.ylabel('出现次数')
plt.grid(True)
plt.tight_layout()
plt.show()

# 推荐号码（策略：选择高频号码）
top_front = [int(num) for num, _ in front_freq.most_common(5)]
top_back = [int(num) for num, _ in back_freq.most_common(2)]

# 按号码从小到大排序
top_front.sort()
top_back.sort()

#  输出推荐结果
recommended_front = ' '.join(str(n).zfill(2) for n in top_front)
recommended_back = ' '.join(str(n).zfill(2) for n in top_back)

print(f"推荐大乐透号码（2025年7月2日）：")
print(f"前区：{recommended_front}")
print(f"后区：{recommended_back}")